{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a20b9db6-46be-46a5-990a-fe2eef70d3e4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-13T07:42:14.622883Z",
     "iopub.status.busy": "2024-11-13T07:42:14.622314Z",
     "iopub.status.idle": "2024-11-13T07:45:33.732054Z",
     "msg_id": "72e6b4c6-e4de-4190-9bf4-e137bcc0b558",
     "shell.execute_reply": "2024-11-13T07:45:33.731237Z",
     "shell.execute_reply.started": "2024-11-13T07:42:14.622853Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "begin train word2vec\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1737886/2042289992.py:27: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  data[col2] = data[col2].astype(str)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    6585066\n",
      "2    5850610\n",
      "1    5658252\n",
      "Name: date_months_to_now, dtype: int64\n",
      "                            CUST_NO  date_months_to_now  \\\n",
      "0  0000436ed9d65dd70f8ccc904947595e                   0   \n",
      "1  0000436ed9d65dd70f8ccc904947595e                   1   \n",
      "2  0000436ed9d65dd70f8ccc904947595e                   2   \n",
      "3  00005a3b6d6ad8a3624d07622caf2c3f                   0   \n",
      "4  00005a3b6d6ad8a3624d07622caf2c3f                   1   \n",
      "\n",
      "                 mb_pageview_dtl_REFERRER_TITLE_text  \n",
      "0  [3f54b3d12dcecc882dccfd4e422360f2_c36a4b6527a8...  \n",
      "1  [6bb41067851861502c74edd3b31005ae_428a1709f794...  \n",
      "2  [99e968a9ee67ca8a825870088c1ea915_769dab657beb...  \n",
      "3  [8f8869df6bf7afd348c625dc682dbbce_99e968a9ee67...  \n",
      "4  [fb05b0d0036d613437aefd9b83e6e01f_769dab657beb...  \n",
      "150645\n",
      "39\n",
      "load model\n",
      "begin make feature\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 150645/150645 [01:25<00:00, 1754.24it/s]\n"
     ]
    }
   ],
   "source": [
    "# 6.2.2 操作页面Word2Vec特征\n",
    "import os\n",
    "import random\n",
    "from gensim.models import Word2Vec\n",
    "import gc\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "def set_seed(seed=2020):\n",
    "    random.seed(seed)\n",
    "    os.environ['PYTHONHASHSEED'] = str(seed)\n",
    "    np.random.seed(seed)\n",
    "set_seed(2020)\n",
    "\n",
    "\n",
    "mb_pageview_dtl_train    = pd.read_csv('../contest/train/GTGSH_MB_PAGEVIEW_DTL_TRAIN.csv')\n",
    "mb_pageview_dtl_train.columns = [('_').join(['mb_pageview_dtl', col]) if col not in ['CUST_NO', 'FLAG'] else col for col in mb_pageview_dtl_train.columns]\n",
    "\n",
    "mb_pageview_dtl_test_a = pd.read_csv('../contest/A/GTGSH_MB_PAGEVIEW_DTL_A.csv')\n",
    "mb_pageview_dtl_test_a.columns = [('_').join(['mb_pageview_dtl', col]) if col not in ['CUST_NO', 'FLAG'] else col for col in mb_pageview_dtl_test_a.columns]\n",
    "\n",
    "\n",
    "def get_w2v_feature(data, col1, col2, emb_size, type_, ext, feature=[]):\n",
    "    print('begin train word2vec')\n",
    "    data = data[col1 +[col2]]\n",
    "    data[col2] = data[col2].astype(str)\n",
    "    print(data['date_months_to_now'].value_counts())\n",
    "    \n",
    "    tmp = data.groupby(col1)[col2].apply(lambda x:list(x)).reset_index()\n",
    "    sentences = tmp[col2].values.tolist()\n",
    "    print(tmp.head())\n",
    "    print(len(sentences))\n",
    "    print(len(sentences[0]))\n",
    "    del tmp[col2]\n",
    "    \n",
    "    if os.path.exists('./{}_{}_feature{}_{}.model'.format('_'.join(col1),col2, ext, type_)):\n",
    "        print(\"load model\")\n",
    "        model = Word2Vec.load('./{}_{}_feature{}_{}.model'.format('_'.join(col1),col2, ext, type_))\n",
    "        \n",
    "    else:\n",
    "        print(\"new model\")\n",
    "        model = Word2Vec(sentences, vector_size=emb_size, window=10, min_count=1, sg=1, seed=42, epochs=10, workers=64)\n",
    "        model.save('./{}_{}_feature{}_{}.model'.format('_'.join(col1),col2, ext, type_))\n",
    "    emb_matrix = []\n",
    "    emb_dict = {}\n",
    "    \n",
    "    # CUST_NO不重复\n",
    "    print('begin make feature')\n",
    "    for seq in tqdm(sentences):\n",
    "        vec = []\n",
    "        for w in seq:\n",
    "            vocab = model.wv\n",
    "            if w in vocab:\n",
    "                vec.append(vocab[w])  # 句矩阵\n",
    "                emb_dict[w] = vocab[w]  # 词汇矩阵\n",
    "        if len(vec) > 0:\n",
    "            emb_matrix.append(np.mean(vec, axis=0))  # 求平均\n",
    "        else:\n",
    "            emb_matrix.append([0] * emb_size)  # model中没有则填0\n",
    "            \n",
    "    emb_matrix = np.array(emb_matrix)\n",
    "    for i in range(emb_size):\n",
    "        tmp['{}_{}_emb_{}_{}'.format('_'.join(col1), col2, i, ext)] = emb_matrix[:, i] # 每一列单独赋值 成特征\n",
    "        feature.append('{}_{}_emb_{}_{}'.format('_'.join(col1), col2, i,ext))\n",
    "    del model, emb_matrix, sentences\n",
    "    \n",
    "    return tmp, feature\n",
    "\n",
    "def gen_embed(df_train, df_test, abstract):\n",
    "\n",
    "    def get_days_to_now(df):\n",
    "        df[\"mb_pageview_dtl_OPERATION_DATE\"] = pd.to_datetime(df[\"mb_pageview_dtl_OPERATION_DATE\"], format=\"%Y%m%d\")\n",
    "        df_months_to_now = (df[\"mb_pageview_dtl_OPERATION_DATE\"].max() - df[\"mb_pageview_dtl_OPERATION_DATE\"]).dt.days\n",
    "        df[\"date_months_to_now\"] = df_months_to_now // 31  # 距今月数\n",
    "        df[\"date_weeks_to_now\"] = df_months_to_now // 7  # 距今周数\n",
    "        df[\"date_days_to_now\"] = df_months_to_now  # 距今天数\n",
    "        \n",
    "        return df\n",
    "    \n",
    "    # 日期转换\n",
    "    df_train = get_days_to_now(df_train)\n",
    "    df_test = get_days_to_now(df_test)\n",
    "\n",
    "    temp1 = df_train[['CUST_NO', 'mb_pageview_dtl_REFERRER_TITLE', 'mb_pageview_dtl_PAGE_TITLE', 'date_months_to_now']].copy()\n",
    "    temp2 = df_test[['CUST_NO', 'mb_pageview_dtl_REFERRER_TITLE', 'mb_pageview_dtl_PAGE_TITLE', 'date_months_to_now']].copy()\n",
    "\n",
    "    # 文本拼接\n",
    "    temp1['mb_pageview_dtl_REFERRER_TITLE_text'] = df_train['mb_pageview_dtl_PAGE_TITLE'].astype(str) + '_' + df_train['mb_pageview_dtl_REFERRER_TITLE'].astype(str)\n",
    "    temp2['mb_pageview_dtl_REFERRER_TITLE_text'] = df_test['mb_pageview_dtl_PAGE_TITLE'].astype(str) + '_' + df_test['mb_pageview_dtl_REFERRER_TITLE'].astype(str)\n",
    "    common_word = set(temp1[\"mb_pageview_dtl_REFERRER_TITLE_text\"]).intersection(set(temp2[\"mb_pageview_dtl_REFERRER_TITLE_text\"]))\n",
    "    \n",
    "    # 训练测试通用词\n",
    "    # print(common_word)\n",
    "    temp1 = temp1[temp1['mb_pageview_dtl_REFERRER_TITLE_text'].isin(common_word)]\n",
    "    temp2 = temp2[temp2['mb_pageview_dtl_REFERRER_TITLE_text'].isin(common_word)]\n",
    "    temp = pd.concat([temp1, temp2], axis=0)\n",
    "    # print(temp)\n",
    "\n",
    "    # 按月份获取词向量\n",
    "    df_temp, feature = get_w2v_feature(temp, ['CUST_NO', 'date_months_to_now'], 'mb_pageview_dtl_REFERRER_TITLE_text', 32, abstract, ext='32', feature=[])\n",
    "    temp_month_1 = df_temp[df_temp['date_months_to_now'] == 0].reset_index(drop = True)\n",
    "    del temp_month_1['date_months_to_now']\n",
    "    temp_month_1.columns = ['CUST_NO'] + [f + '_First' for f in temp_month_1.columns if f not in ['CUST_NO', 'date_months_to_now']]\n",
    "\n",
    "    temp_month_2 = df_temp[df_temp['date_months_to_now'] == 1].reset_index(drop = True)\n",
    "    del temp_month_2['date_months_to_now']\n",
    "    temp_month_2.columns = ['CUST_NO'] + [f + '_Second' for f in temp_month_2.columns if f not in ['CUST_NO', 'date_months_to_now']]\n",
    "\n",
    "    temp_month_3 = df_temp[df_temp['date_months_to_now'] == 2].reset_index(drop = True)\n",
    "    \n",
    "    del temp_month_3['date_months_to_now']\n",
    "    temp_month_3.columns = ['CUST_NO'] + [f + '_Third' for f in temp_month_3.columns if f not in ['CUST_NO', 'date_months_to_now']]\n",
    "\n",
    "    return temp_month_1, temp_month_2, temp_month_3\n",
    "    \n",
    "\n",
    "mb_embed_month_1, mb_embed_month_2, mb_embed_month_3 = gen_embed(mb_pageview_dtl_train, mb_pageview_dtl_test_a, \"train_A\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0ce70720-e5e5-4e59-ba06-c52fd5e60483",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-13T07:45:33.733815Z",
     "iopub.status.busy": "2024-11-13T07:45:33.733450Z",
     "iopub.status.idle": "2024-11-13T07:45:35.540061Z",
     "msg_id": "1c8132de-2c45-4e27-921c-e354e249a177",
     "shell.execute_reply": "2024-11-13T07:45:35.539252Z",
     "shell.execute_reply.started": "2024-11-13T07:45:33.733788Z"
    }
   },
   "outputs": [],
   "source": [
    "mb_embed_month_1.to_csv(r'../process/mb_pag_ref_w2v.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4b3e472c-7af8-4de4-9406-00f7ee1fbf19",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-13T09:11:09.395178Z",
     "iopub.status.busy": "2024-11-13T09:11:09.394699Z",
     "iopub.status.idle": "2024-11-13T09:11:09.440211Z",
     "msg_id": "7cb0a269-1ed1-4547-a065-c910acf37e13",
     "shell.execute_reply": "2024-11-13T09:11:09.439535Z",
     "shell.execute_reply.started": "2024-11-13T09:11:09.395149Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CUST_NO</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_0_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_1_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_2_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_3_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_4_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_5_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_6_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_7_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_8_32_First</th>\n",
       "      <th>...</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_22_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_23_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_24_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_25_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_26_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_27_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_28_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_29_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_30_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_31_32_First</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0000436ed9d65dd70f8ccc904947595e</td>\n",
       "      <td>-0.113331</td>\n",
       "      <td>0.266947</td>\n",
       "      <td>-0.368916</td>\n",
       "      <td>0.605203</td>\n",
       "      <td>-0.087623</td>\n",
       "      <td>0.295798</td>\n",
       "      <td>-0.276558</td>\n",
       "      <td>0.115125</td>\n",
       "      <td>-0.322745</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.144956</td>\n",
       "      <td>0.413002</td>\n",
       "      <td>0.120592</td>\n",
       "      <td>-0.271420</td>\n",
       "      <td>-0.131984</td>\n",
       "      <td>0.132462</td>\n",
       "      <td>-0.173853</td>\n",
       "      <td>0.019005</td>\n",
       "      <td>-0.116133</td>\n",
       "      <td>0.817674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>00005a3b6d6ad8a3624d07622caf2c3f</td>\n",
       "      <td>-0.104646</td>\n",
       "      <td>0.176043</td>\n",
       "      <td>-0.417991</td>\n",
       "      <td>0.340617</td>\n",
       "      <td>-0.008003</td>\n",
       "      <td>0.613948</td>\n",
       "      <td>-0.002455</td>\n",
       "      <td>0.240211</td>\n",
       "      <td>-0.560098</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.030927</td>\n",
       "      <td>0.153632</td>\n",
       "      <td>0.178151</td>\n",
       "      <td>-0.709385</td>\n",
       "      <td>-0.052036</td>\n",
       "      <td>-0.306877</td>\n",
       "      <td>-0.230123</td>\n",
       "      <td>0.462944</td>\n",
       "      <td>0.205001</td>\n",
       "      <td>0.712272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0001cd907f5e282c2df630089008a102</td>\n",
       "      <td>0.174361</td>\n",
       "      <td>0.463437</td>\n",
       "      <td>-0.507842</td>\n",
       "      <td>0.359331</td>\n",
       "      <td>0.046387</td>\n",
       "      <td>0.340061</td>\n",
       "      <td>-0.347735</td>\n",
       "      <td>0.187928</td>\n",
       "      <td>0.017493</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.213757</td>\n",
       "      <td>0.754868</td>\n",
       "      <td>0.434371</td>\n",
       "      <td>-0.871753</td>\n",
       "      <td>0.328308</td>\n",
       "      <td>-0.514331</td>\n",
       "      <td>-0.398787</td>\n",
       "      <td>0.648119</td>\n",
       "      <td>0.318217</td>\n",
       "      <td>-0.102214</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>000331eef7c8927311b39ce3fa703aec</td>\n",
       "      <td>-0.187133</td>\n",
       "      <td>0.060922</td>\n",
       "      <td>-0.558356</td>\n",
       "      <td>0.339091</td>\n",
       "      <td>0.030745</td>\n",
       "      <td>0.428289</td>\n",
       "      <td>-0.210660</td>\n",
       "      <td>0.217337</td>\n",
       "      <td>0.001979</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.363051</td>\n",
       "      <td>0.188095</td>\n",
       "      <td>0.101903</td>\n",
       "      <td>-0.487684</td>\n",
       "      <td>0.126649</td>\n",
       "      <td>0.098532</td>\n",
       "      <td>-0.160320</td>\n",
       "      <td>0.026881</td>\n",
       "      <td>-0.248976</td>\n",
       "      <td>0.032129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>00045e57e7e432d20a943013665e454e</td>\n",
       "      <td>-0.052921</td>\n",
       "      <td>0.242727</td>\n",
       "      <td>-0.620065</td>\n",
       "      <td>0.338309</td>\n",
       "      <td>-0.257210</td>\n",
       "      <td>0.371038</td>\n",
       "      <td>-0.544450</td>\n",
       "      <td>-0.382223</td>\n",
       "      <td>-0.304391</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.085108</td>\n",
       "      <td>0.158300</td>\n",
       "      <td>-0.127017</td>\n",
       "      <td>-0.428262</td>\n",
       "      <td>-0.137178</td>\n",
       "      <td>-0.137715</td>\n",
       "      <td>-0.116258</td>\n",
       "      <td>-0.025325</td>\n",
       "      <td>0.029299</td>\n",
       "      <td>0.196405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52170</th>\n",
       "      <td>fffc5ca2282730167025299137465605</td>\n",
       "      <td>0.095129</td>\n",
       "      <td>0.383750</td>\n",
       "      <td>-0.468226</td>\n",
       "      <td>0.499424</td>\n",
       "      <td>-0.007442</td>\n",
       "      <td>0.454862</td>\n",
       "      <td>-0.444133</td>\n",
       "      <td>0.191322</td>\n",
       "      <td>-0.286703</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.243785</td>\n",
       "      <td>0.321809</td>\n",
       "      <td>-0.107736</td>\n",
       "      <td>-0.390252</td>\n",
       "      <td>-0.025179</td>\n",
       "      <td>-0.088010</td>\n",
       "      <td>-0.161669</td>\n",
       "      <td>-0.016306</td>\n",
       "      <td>-0.054703</td>\n",
       "      <td>0.572592</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52171</th>\n",
       "      <td>fffc768b30e626a0083dd284e81e933c</td>\n",
       "      <td>-0.135391</td>\n",
       "      <td>0.244256</td>\n",
       "      <td>-0.380279</td>\n",
       "      <td>0.384300</td>\n",
       "      <td>-0.011840</td>\n",
       "      <td>0.461619</td>\n",
       "      <td>-0.326544</td>\n",
       "      <td>0.335104</td>\n",
       "      <td>-0.256635</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.239977</td>\n",
       "      <td>0.239413</td>\n",
       "      <td>-0.213588</td>\n",
       "      <td>-0.304859</td>\n",
       "      <td>-0.051114</td>\n",
       "      <td>-0.020200</td>\n",
       "      <td>-0.144737</td>\n",
       "      <td>0.157772</td>\n",
       "      <td>-0.168718</td>\n",
       "      <td>0.504752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52172</th>\n",
       "      <td>fffda9aabcc1d6af3bacd41a58c73f34</td>\n",
       "      <td>-0.080127</td>\n",
       "      <td>-0.078953</td>\n",
       "      <td>-0.711737</td>\n",
       "      <td>-0.195134</td>\n",
       "      <td>0.282035</td>\n",
       "      <td>0.458743</td>\n",
       "      <td>-0.425420</td>\n",
       "      <td>0.227710</td>\n",
       "      <td>-0.206769</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.233967</td>\n",
       "      <td>0.390257</td>\n",
       "      <td>-0.072012</td>\n",
       "      <td>-0.097488</td>\n",
       "      <td>0.166206</td>\n",
       "      <td>-0.276566</td>\n",
       "      <td>0.049104</td>\n",
       "      <td>0.387729</td>\n",
       "      <td>0.172605</td>\n",
       "      <td>0.220133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52173</th>\n",
       "      <td>fffdc2bdf96a6d67d51994bb1a930d28</td>\n",
       "      <td>0.049033</td>\n",
       "      <td>-0.061150</td>\n",
       "      <td>-0.534990</td>\n",
       "      <td>0.369379</td>\n",
       "      <td>0.028349</td>\n",
       "      <td>0.269555</td>\n",
       "      <td>-0.143712</td>\n",
       "      <td>-0.013115</td>\n",
       "      <td>0.052868</td>\n",
       "      <td>...</td>\n",
       "      <td>0.192841</td>\n",
       "      <td>0.012191</td>\n",
       "      <td>0.211706</td>\n",
       "      <td>-0.294219</td>\n",
       "      <td>-0.041011</td>\n",
       "      <td>-0.000742</td>\n",
       "      <td>-0.194473</td>\n",
       "      <td>-0.156966</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.263940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52174</th>\n",
       "      <td>fffdf527d36c6ef44f18c2c8f9fef809</td>\n",
       "      <td>0.138858</td>\n",
       "      <td>0.027456</td>\n",
       "      <td>-0.445840</td>\n",
       "      <td>0.519568</td>\n",
       "      <td>-0.166023</td>\n",
       "      <td>0.472290</td>\n",
       "      <td>-0.255091</td>\n",
       "      <td>-0.607687</td>\n",
       "      <td>-0.287547</td>\n",
       "      <td>...</td>\n",
       "      <td>0.260063</td>\n",
       "      <td>-0.009119</td>\n",
       "      <td>0.080832</td>\n",
       "      <td>-0.366540</td>\n",
       "      <td>-0.074107</td>\n",
       "      <td>0.026828</td>\n",
       "      <td>0.006448</td>\n",
       "      <td>-0.212893</td>\n",
       "      <td>0.055220</td>\n",
       "      <td>-0.027718</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>52175 rows × 33 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                CUST_NO  \\\n",
       "0      0000436ed9d65dd70f8ccc904947595e   \n",
       "1      00005a3b6d6ad8a3624d07622caf2c3f   \n",
       "2      0001cd907f5e282c2df630089008a102   \n",
       "3      000331eef7c8927311b39ce3fa703aec   \n",
       "4      00045e57e7e432d20a943013665e454e   \n",
       "...                                 ...   \n",
       "52170  fffc5ca2282730167025299137465605   \n",
       "52171  fffc768b30e626a0083dd284e81e933c   \n",
       "52172  fffda9aabcc1d6af3bacd41a58c73f34   \n",
       "52173  fffdc2bdf96a6d67d51994bb1a930d28   \n",
       "52174  fffdf527d36c6ef44f18c2c8f9fef809   \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_0_32_First  \\\n",
       "0                                              -0.113331                               \n",
       "1                                              -0.104646                               \n",
       "2                                               0.174361                               \n",
       "3                                              -0.187133                               \n",
       "4                                              -0.052921                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.095129                               \n",
       "52171                                          -0.135391                               \n",
       "52172                                          -0.080127                               \n",
       "52173                                           0.049033                               \n",
       "52174                                           0.138858                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_1_32_First  \\\n",
       "0                                               0.266947                               \n",
       "1                                               0.176043                               \n",
       "2                                               0.463437                               \n",
       "3                                               0.060922                               \n",
       "4                                               0.242727                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.383750                               \n",
       "52171                                           0.244256                               \n",
       "52172                                          -0.078953                               \n",
       "52173                                          -0.061150                               \n",
       "52174                                           0.027456                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_2_32_First  \\\n",
       "0                                              -0.368916                               \n",
       "1                                              -0.417991                               \n",
       "2                                              -0.507842                               \n",
       "3                                              -0.558356                               \n",
       "4                                              -0.620065                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.468226                               \n",
       "52171                                          -0.380279                               \n",
       "52172                                          -0.711737                               \n",
       "52173                                          -0.534990                               \n",
       "52174                                          -0.445840                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_3_32_First  \\\n",
       "0                                               0.605203                               \n",
       "1                                               0.340617                               \n",
       "2                                               0.359331                               \n",
       "3                                               0.339091                               \n",
       "4                                               0.338309                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.499424                               \n",
       "52171                                           0.384300                               \n",
       "52172                                          -0.195134                               \n",
       "52173                                           0.369379                               \n",
       "52174                                           0.519568                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_4_32_First  \\\n",
       "0                                              -0.087623                               \n",
       "1                                              -0.008003                               \n",
       "2                                               0.046387                               \n",
       "3                                               0.030745                               \n",
       "4                                              -0.257210                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.007442                               \n",
       "52171                                          -0.011840                               \n",
       "52172                                           0.282035                               \n",
       "52173                                           0.028349                               \n",
       "52174                                          -0.166023                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_5_32_First  \\\n",
       "0                                               0.295798                               \n",
       "1                                               0.613948                               \n",
       "2                                               0.340061                               \n",
       "3                                               0.428289                               \n",
       "4                                               0.371038                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.454862                               \n",
       "52171                                           0.461619                               \n",
       "52172                                           0.458743                               \n",
       "52173                                           0.269555                               \n",
       "52174                                           0.472290                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_6_32_First  \\\n",
       "0                                              -0.276558                               \n",
       "1                                              -0.002455                               \n",
       "2                                              -0.347735                               \n",
       "3                                              -0.210660                               \n",
       "4                                              -0.544450                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.444133                               \n",
       "52171                                          -0.326544                               \n",
       "52172                                          -0.425420                               \n",
       "52173                                          -0.143712                               \n",
       "52174                                          -0.255091                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_7_32_First  \\\n",
       "0                                               0.115125                               \n",
       "1                                               0.240211                               \n",
       "2                                               0.187928                               \n",
       "3                                               0.217337                               \n",
       "4                                              -0.382223                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.191322                               \n",
       "52171                                           0.335104                               \n",
       "52172                                           0.227710                               \n",
       "52173                                          -0.013115                               \n",
       "52174                                          -0.607687                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_8_32_First  \\\n",
       "0                                              -0.322745                               \n",
       "1                                              -0.560098                               \n",
       "2                                               0.017493                               \n",
       "3                                               0.001979                               \n",
       "4                                              -0.304391                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.286703                               \n",
       "52171                                          -0.256635                               \n",
       "52172                                          -0.206769                               \n",
       "52173                                           0.052868                               \n",
       "52174                                          -0.287547                               \n",
       "\n",
       "       ...  \\\n",
       "0      ...   \n",
       "1      ...   \n",
       "2      ...   \n",
       "3      ...   \n",
       "4      ...   \n",
       "...    ...   \n",
       "52170  ...   \n",
       "52171  ...   \n",
       "52172  ...   \n",
       "52173  ...   \n",
       "52174  ...   \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_22_32_First  \\\n",
       "0                                              -0.144956                                \n",
       "1                                              -0.030927                                \n",
       "2                                              -0.213757                                \n",
       "3                                              -0.363051                                \n",
       "4                                              -0.085108                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.243785                                \n",
       "52171                                          -0.239977                                \n",
       "52172                                          -0.233967                                \n",
       "52173                                           0.192841                                \n",
       "52174                                           0.260063                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_23_32_First  \\\n",
       "0                                               0.413002                                \n",
       "1                                               0.153632                                \n",
       "2                                               0.754868                                \n",
       "3                                               0.188095                                \n",
       "4                                               0.158300                                \n",
       "...                                                  ...                                \n",
       "52170                                           0.321809                                \n",
       "52171                                           0.239413                                \n",
       "52172                                           0.390257                                \n",
       "52173                                           0.012191                                \n",
       "52174                                          -0.009119                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_24_32_First  \\\n",
       "0                                               0.120592                                \n",
       "1                                               0.178151                                \n",
       "2                                               0.434371                                \n",
       "3                                               0.101903                                \n",
       "4                                              -0.127017                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.107736                                \n",
       "52171                                          -0.213588                                \n",
       "52172                                          -0.072012                                \n",
       "52173                                           0.211706                                \n",
       "52174                                           0.080832                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_25_32_First  \\\n",
       "0                                              -0.271420                                \n",
       "1                                              -0.709385                                \n",
       "2                                              -0.871753                                \n",
       "3                                              -0.487684                                \n",
       "4                                              -0.428262                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.390252                                \n",
       "52171                                          -0.304859                                \n",
       "52172                                          -0.097488                                \n",
       "52173                                          -0.294219                                \n",
       "52174                                          -0.366540                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_26_32_First  \\\n",
       "0                                              -0.131984                                \n",
       "1                                              -0.052036                                \n",
       "2                                               0.328308                                \n",
       "3                                               0.126649                                \n",
       "4                                              -0.137178                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.025179                                \n",
       "52171                                          -0.051114                                \n",
       "52172                                           0.166206                                \n",
       "52173                                          -0.041011                                \n",
       "52174                                          -0.074107                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_27_32_First  \\\n",
       "0                                               0.132462                                \n",
       "1                                              -0.306877                                \n",
       "2                                              -0.514331                                \n",
       "3                                               0.098532                                \n",
       "4                                              -0.137715                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.088010                                \n",
       "52171                                          -0.020200                                \n",
       "52172                                          -0.276566                                \n",
       "52173                                          -0.000742                                \n",
       "52174                                           0.026828                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_28_32_First  \\\n",
       "0                                              -0.173853                                \n",
       "1                                              -0.230123                                \n",
       "2                                              -0.398787                                \n",
       "3                                              -0.160320                                \n",
       "4                                              -0.116258                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.161669                                \n",
       "52171                                          -0.144737                                \n",
       "52172                                           0.049104                                \n",
       "52173                                          -0.194473                                \n",
       "52174                                           0.006448                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_29_32_First  \\\n",
       "0                                               0.019005                                \n",
       "1                                               0.462944                                \n",
       "2                                               0.648119                                \n",
       "3                                               0.026881                                \n",
       "4                                              -0.025325                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.016306                                \n",
       "52171                                           0.157772                                \n",
       "52172                                           0.387729                                \n",
       "52173                                          -0.156966                                \n",
       "52174                                          -0.212893                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_30_32_First  \\\n",
       "0                                              -0.116133                                \n",
       "1                                               0.205001                                \n",
       "2                                               0.318217                                \n",
       "3                                              -0.248976                                \n",
       "4                                               0.029299                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.054703                                \n",
       "52171                                          -0.168718                                \n",
       "52172                                           0.172605                                \n",
       "52173                                           0.000020                                \n",
       "52174                                           0.055220                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_31_32_First  \n",
       "0                                               0.817674                               \n",
       "1                                               0.712272                               \n",
       "2                                              -0.102214                               \n",
       "3                                               0.032129                               \n",
       "4                                               0.196405                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.572592                               \n",
       "52171                                           0.504752                               \n",
       "52172                                           0.220133                               \n",
       "52173                                           0.263940                               \n",
       "52174                                          -0.027718                               \n",
       "\n",
       "[52175 rows x 33 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mb_embed_month_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d31f1288-4781-4884-a18b-5d34bee5d7b1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-13T09:11:24.703085Z",
     "iopub.status.busy": "2024-11-13T09:11:24.702613Z",
     "iopub.status.idle": "2024-11-13T09:11:25.078307Z",
     "msg_id": "d9f2793f-77d4-47b1-b8a5-b916d9301730",
     "shell.execute_reply": "2024-11-13T09:11:25.077582Z",
     "shell.execute_reply.started": "2024-11-13T09:11:24.703054Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CUST_NO</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_0_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_1_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_2_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_3_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_4_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_5_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_6_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_7_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_8_32_First</th>\n",
       "      <th>...</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_22_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_23_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_24_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_25_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_26_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_27_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_28_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_29_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_30_32_First</th>\n",
       "      <th>CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_31_32_First</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0000436ed9d65dd70f8ccc904947595e</td>\n",
       "      <td>-0.113331</td>\n",
       "      <td>0.266947</td>\n",
       "      <td>-0.368916</td>\n",
       "      <td>0.605203</td>\n",
       "      <td>-0.087623</td>\n",
       "      <td>0.295798</td>\n",
       "      <td>-0.276558</td>\n",
       "      <td>0.115125</td>\n",
       "      <td>-0.322745</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.144956</td>\n",
       "      <td>0.413002</td>\n",
       "      <td>0.120592</td>\n",
       "      <td>-0.271420</td>\n",
       "      <td>-0.131984</td>\n",
       "      <td>0.132462</td>\n",
       "      <td>-0.173853</td>\n",
       "      <td>0.019005</td>\n",
       "      <td>-0.116133</td>\n",
       "      <td>0.817674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>00005a3b6d6ad8a3624d07622caf2c3f</td>\n",
       "      <td>-0.104646</td>\n",
       "      <td>0.176043</td>\n",
       "      <td>-0.417991</td>\n",
       "      <td>0.340617</td>\n",
       "      <td>-0.008003</td>\n",
       "      <td>0.613948</td>\n",
       "      <td>-0.002455</td>\n",
       "      <td>0.240211</td>\n",
       "      <td>-0.560098</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.030927</td>\n",
       "      <td>0.153632</td>\n",
       "      <td>0.178151</td>\n",
       "      <td>-0.709384</td>\n",
       "      <td>-0.052036</td>\n",
       "      <td>-0.306877</td>\n",
       "      <td>-0.230123</td>\n",
       "      <td>0.462944</td>\n",
       "      <td>0.205001</td>\n",
       "      <td>0.712272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0001cd907f5e282c2df630089008a102</td>\n",
       "      <td>0.174361</td>\n",
       "      <td>0.463437</td>\n",
       "      <td>-0.507842</td>\n",
       "      <td>0.359331</td>\n",
       "      <td>0.046387</td>\n",
       "      <td>0.340061</td>\n",
       "      <td>-0.347735</td>\n",
       "      <td>0.187928</td>\n",
       "      <td>0.017493</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.213757</td>\n",
       "      <td>0.754868</td>\n",
       "      <td>0.434371</td>\n",
       "      <td>-0.871753</td>\n",
       "      <td>0.328308</td>\n",
       "      <td>-0.514331</td>\n",
       "      <td>-0.398787</td>\n",
       "      <td>0.648119</td>\n",
       "      <td>0.318217</td>\n",
       "      <td>-0.102214</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>000331eef7c8927311b39ce3fa703aec</td>\n",
       "      <td>-0.187133</td>\n",
       "      <td>0.060922</td>\n",
       "      <td>-0.558356</td>\n",
       "      <td>0.339091</td>\n",
       "      <td>0.030745</td>\n",
       "      <td>0.428289</td>\n",
       "      <td>-0.210660</td>\n",
       "      <td>0.217337</td>\n",
       "      <td>0.001979</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.363051</td>\n",
       "      <td>0.188095</td>\n",
       "      <td>0.101903</td>\n",
       "      <td>-0.487684</td>\n",
       "      <td>0.126649</td>\n",
       "      <td>0.098532</td>\n",
       "      <td>-0.160320</td>\n",
       "      <td>0.026881</td>\n",
       "      <td>-0.248976</td>\n",
       "      <td>0.032129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>00045e57e7e432d20a943013665e454e</td>\n",
       "      <td>-0.052921</td>\n",
       "      <td>0.242727</td>\n",
       "      <td>-0.620065</td>\n",
       "      <td>0.338309</td>\n",
       "      <td>-0.257210</td>\n",
       "      <td>0.371038</td>\n",
       "      <td>-0.544450</td>\n",
       "      <td>-0.382223</td>\n",
       "      <td>-0.304391</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.085108</td>\n",
       "      <td>0.158300</td>\n",
       "      <td>-0.127017</td>\n",
       "      <td>-0.428262</td>\n",
       "      <td>-0.137178</td>\n",
       "      <td>-0.137715</td>\n",
       "      <td>-0.116258</td>\n",
       "      <td>-0.025325</td>\n",
       "      <td>0.029299</td>\n",
       "      <td>0.196405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52170</th>\n",
       "      <td>fffc5ca2282730167025299137465605</td>\n",
       "      <td>0.095129</td>\n",
       "      <td>0.383750</td>\n",
       "      <td>-0.468226</td>\n",
       "      <td>0.499424</td>\n",
       "      <td>-0.007442</td>\n",
       "      <td>0.454862</td>\n",
       "      <td>-0.444133</td>\n",
       "      <td>0.191322</td>\n",
       "      <td>-0.286703</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.243785</td>\n",
       "      <td>0.321809</td>\n",
       "      <td>-0.107736</td>\n",
       "      <td>-0.390252</td>\n",
       "      <td>-0.025179</td>\n",
       "      <td>-0.088010</td>\n",
       "      <td>-0.161669</td>\n",
       "      <td>-0.016306</td>\n",
       "      <td>-0.054703</td>\n",
       "      <td>0.572592</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52171</th>\n",
       "      <td>fffc768b30e626a0083dd284e81e933c</td>\n",
       "      <td>-0.135391</td>\n",
       "      <td>0.244256</td>\n",
       "      <td>-0.380279</td>\n",
       "      <td>0.384300</td>\n",
       "      <td>-0.011840</td>\n",
       "      <td>0.461619</td>\n",
       "      <td>-0.326544</td>\n",
       "      <td>0.335104</td>\n",
       "      <td>-0.256635</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.239977</td>\n",
       "      <td>0.239413</td>\n",
       "      <td>-0.213588</td>\n",
       "      <td>-0.304859</td>\n",
       "      <td>-0.051114</td>\n",
       "      <td>-0.020200</td>\n",
       "      <td>-0.144737</td>\n",
       "      <td>0.157772</td>\n",
       "      <td>-0.168718</td>\n",
       "      <td>0.504752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52172</th>\n",
       "      <td>fffda9aabcc1d6af3bacd41a58c73f34</td>\n",
       "      <td>-0.080127</td>\n",
       "      <td>-0.078953</td>\n",
       "      <td>-0.711737</td>\n",
       "      <td>-0.195134</td>\n",
       "      <td>0.282035</td>\n",
       "      <td>0.458743</td>\n",
       "      <td>-0.425420</td>\n",
       "      <td>0.227710</td>\n",
       "      <td>-0.206769</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.233967</td>\n",
       "      <td>0.390257</td>\n",
       "      <td>-0.072012</td>\n",
       "      <td>-0.097488</td>\n",
       "      <td>0.166206</td>\n",
       "      <td>-0.276566</td>\n",
       "      <td>0.049104</td>\n",
       "      <td>0.387729</td>\n",
       "      <td>0.172605</td>\n",
       "      <td>0.220133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52173</th>\n",
       "      <td>fffdc2bdf96a6d67d51994bb1a930d28</td>\n",
       "      <td>0.049033</td>\n",
       "      <td>-0.061150</td>\n",
       "      <td>-0.534991</td>\n",
       "      <td>0.369379</td>\n",
       "      <td>0.028349</td>\n",
       "      <td>0.269555</td>\n",
       "      <td>-0.143712</td>\n",
       "      <td>-0.013115</td>\n",
       "      <td>0.052868</td>\n",
       "      <td>...</td>\n",
       "      <td>0.192841</td>\n",
       "      <td>0.012191</td>\n",
       "      <td>0.211706</td>\n",
       "      <td>-0.294219</td>\n",
       "      <td>-0.041011</td>\n",
       "      <td>-0.000742</td>\n",
       "      <td>-0.194473</td>\n",
       "      <td>-0.156966</td>\n",
       "      <td>0.000020</td>\n",
       "      <td>0.263940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52174</th>\n",
       "      <td>fffdf527d36c6ef44f18c2c8f9fef809</td>\n",
       "      <td>0.138858</td>\n",
       "      <td>0.027456</td>\n",
       "      <td>-0.445840</td>\n",
       "      <td>0.519568</td>\n",
       "      <td>-0.166023</td>\n",
       "      <td>0.472290</td>\n",
       "      <td>-0.255091</td>\n",
       "      <td>-0.607687</td>\n",
       "      <td>-0.287547</td>\n",
       "      <td>...</td>\n",
       "      <td>0.260063</td>\n",
       "      <td>-0.009119</td>\n",
       "      <td>0.080832</td>\n",
       "      <td>-0.366540</td>\n",
       "      <td>-0.074107</td>\n",
       "      <td>0.026828</td>\n",
       "      <td>0.006448</td>\n",
       "      <td>-0.212893</td>\n",
       "      <td>0.055220</td>\n",
       "      <td>-0.027718</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>52175 rows × 33 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                CUST_NO  \\\n",
       "0      0000436ed9d65dd70f8ccc904947595e   \n",
       "1      00005a3b6d6ad8a3624d07622caf2c3f   \n",
       "2      0001cd907f5e282c2df630089008a102   \n",
       "3      000331eef7c8927311b39ce3fa703aec   \n",
       "4      00045e57e7e432d20a943013665e454e   \n",
       "...                                 ...   \n",
       "52170  fffc5ca2282730167025299137465605   \n",
       "52171  fffc768b30e626a0083dd284e81e933c   \n",
       "52172  fffda9aabcc1d6af3bacd41a58c73f34   \n",
       "52173  fffdc2bdf96a6d67d51994bb1a930d28   \n",
       "52174  fffdf527d36c6ef44f18c2c8f9fef809   \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_0_32_First  \\\n",
       "0                                              -0.113331                               \n",
       "1                                              -0.104646                               \n",
       "2                                               0.174361                               \n",
       "3                                              -0.187133                               \n",
       "4                                              -0.052921                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.095129                               \n",
       "52171                                          -0.135391                               \n",
       "52172                                          -0.080127                               \n",
       "52173                                           0.049033                               \n",
       "52174                                           0.138858                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_1_32_First  \\\n",
       "0                                               0.266947                               \n",
       "1                                               0.176043                               \n",
       "2                                               0.463437                               \n",
       "3                                               0.060922                               \n",
       "4                                               0.242727                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.383750                               \n",
       "52171                                           0.244256                               \n",
       "52172                                          -0.078953                               \n",
       "52173                                          -0.061150                               \n",
       "52174                                           0.027456                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_2_32_First  \\\n",
       "0                                              -0.368916                               \n",
       "1                                              -0.417991                               \n",
       "2                                              -0.507842                               \n",
       "3                                              -0.558356                               \n",
       "4                                              -0.620065                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.468226                               \n",
       "52171                                          -0.380279                               \n",
       "52172                                          -0.711737                               \n",
       "52173                                          -0.534991                               \n",
       "52174                                          -0.445840                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_3_32_First  \\\n",
       "0                                               0.605203                               \n",
       "1                                               0.340617                               \n",
       "2                                               0.359331                               \n",
       "3                                               0.339091                               \n",
       "4                                               0.338309                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.499424                               \n",
       "52171                                           0.384300                               \n",
       "52172                                          -0.195134                               \n",
       "52173                                           0.369379                               \n",
       "52174                                           0.519568                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_4_32_First  \\\n",
       "0                                              -0.087623                               \n",
       "1                                              -0.008003                               \n",
       "2                                               0.046387                               \n",
       "3                                               0.030745                               \n",
       "4                                              -0.257210                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.007442                               \n",
       "52171                                          -0.011840                               \n",
       "52172                                           0.282035                               \n",
       "52173                                           0.028349                               \n",
       "52174                                          -0.166023                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_5_32_First  \\\n",
       "0                                               0.295798                               \n",
       "1                                               0.613948                               \n",
       "2                                               0.340061                               \n",
       "3                                               0.428289                               \n",
       "4                                               0.371038                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.454862                               \n",
       "52171                                           0.461619                               \n",
       "52172                                           0.458743                               \n",
       "52173                                           0.269555                               \n",
       "52174                                           0.472290                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_6_32_First  \\\n",
       "0                                              -0.276558                               \n",
       "1                                              -0.002455                               \n",
       "2                                              -0.347735                               \n",
       "3                                              -0.210660                               \n",
       "4                                              -0.544450                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.444133                               \n",
       "52171                                          -0.326544                               \n",
       "52172                                          -0.425420                               \n",
       "52173                                          -0.143712                               \n",
       "52174                                          -0.255091                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_7_32_First  \\\n",
       "0                                               0.115125                               \n",
       "1                                               0.240211                               \n",
       "2                                               0.187928                               \n",
       "3                                               0.217337                               \n",
       "4                                              -0.382223                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.191322                               \n",
       "52171                                           0.335104                               \n",
       "52172                                           0.227710                               \n",
       "52173                                          -0.013115                               \n",
       "52174                                          -0.607687                               \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_8_32_First  \\\n",
       "0                                              -0.322745                               \n",
       "1                                              -0.560098                               \n",
       "2                                               0.017493                               \n",
       "3                                               0.001979                               \n",
       "4                                              -0.304391                               \n",
       "...                                                  ...                               \n",
       "52170                                          -0.286703                               \n",
       "52171                                          -0.256635                               \n",
       "52172                                          -0.206769                               \n",
       "52173                                           0.052868                               \n",
       "52174                                          -0.287547                               \n",
       "\n",
       "       ...  \\\n",
       "0      ...   \n",
       "1      ...   \n",
       "2      ...   \n",
       "3      ...   \n",
       "4      ...   \n",
       "...    ...   \n",
       "52170  ...   \n",
       "52171  ...   \n",
       "52172  ...   \n",
       "52173  ...   \n",
       "52174  ...   \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_22_32_First  \\\n",
       "0                                              -0.144956                                \n",
       "1                                              -0.030927                                \n",
       "2                                              -0.213757                                \n",
       "3                                              -0.363051                                \n",
       "4                                              -0.085108                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.243785                                \n",
       "52171                                          -0.239977                                \n",
       "52172                                          -0.233967                                \n",
       "52173                                           0.192841                                \n",
       "52174                                           0.260063                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_23_32_First  \\\n",
       "0                                               0.413002                                \n",
       "1                                               0.153632                                \n",
       "2                                               0.754868                                \n",
       "3                                               0.188095                                \n",
       "4                                               0.158300                                \n",
       "...                                                  ...                                \n",
       "52170                                           0.321809                                \n",
       "52171                                           0.239413                                \n",
       "52172                                           0.390257                                \n",
       "52173                                           0.012191                                \n",
       "52174                                          -0.009119                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_24_32_First  \\\n",
       "0                                               0.120592                                \n",
       "1                                               0.178151                                \n",
       "2                                               0.434371                                \n",
       "3                                               0.101903                                \n",
       "4                                              -0.127017                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.107736                                \n",
       "52171                                          -0.213588                                \n",
       "52172                                          -0.072012                                \n",
       "52173                                           0.211706                                \n",
       "52174                                           0.080832                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_25_32_First  \\\n",
       "0                                              -0.271420                                \n",
       "1                                              -0.709384                                \n",
       "2                                              -0.871753                                \n",
       "3                                              -0.487684                                \n",
       "4                                              -0.428262                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.390252                                \n",
       "52171                                          -0.304859                                \n",
       "52172                                          -0.097488                                \n",
       "52173                                          -0.294219                                \n",
       "52174                                          -0.366540                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_26_32_First  \\\n",
       "0                                              -0.131984                                \n",
       "1                                              -0.052036                                \n",
       "2                                               0.328308                                \n",
       "3                                               0.126649                                \n",
       "4                                              -0.137178                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.025179                                \n",
       "52171                                          -0.051114                                \n",
       "52172                                           0.166206                                \n",
       "52173                                          -0.041011                                \n",
       "52174                                          -0.074107                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_27_32_First  \\\n",
       "0                                               0.132462                                \n",
       "1                                              -0.306877                                \n",
       "2                                              -0.514331                                \n",
       "3                                               0.098532                                \n",
       "4                                              -0.137715                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.088010                                \n",
       "52171                                          -0.020200                                \n",
       "52172                                          -0.276566                                \n",
       "52173                                          -0.000742                                \n",
       "52174                                           0.026828                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_28_32_First  \\\n",
       "0                                              -0.173853                                \n",
       "1                                              -0.230123                                \n",
       "2                                              -0.398787                                \n",
       "3                                              -0.160320                                \n",
       "4                                              -0.116258                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.161669                                \n",
       "52171                                          -0.144737                                \n",
       "52172                                           0.049104                                \n",
       "52173                                          -0.194473                                \n",
       "52174                                           0.006448                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_29_32_First  \\\n",
       "0                                               0.019005                                \n",
       "1                                               0.462944                                \n",
       "2                                               0.648119                                \n",
       "3                                               0.026881                                \n",
       "4                                              -0.025325                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.016306                                \n",
       "52171                                           0.157772                                \n",
       "52172                                           0.387729                                \n",
       "52173                                          -0.156966                                \n",
       "52174                                          -0.212893                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_30_32_First  \\\n",
       "0                                              -0.116133                                \n",
       "1                                               0.205001                                \n",
       "2                                               0.318217                                \n",
       "3                                              -0.248976                                \n",
       "4                                               0.029299                                \n",
       "...                                                  ...                                \n",
       "52170                                          -0.054703                                \n",
       "52171                                          -0.168718                                \n",
       "52172                                           0.172605                                \n",
       "52173                                           0.000020                                \n",
       "52174                                           0.055220                                \n",
       "\n",
       "       CUST_NO_date_months_to_now_mb_pageview_dtl_REFERRER_TITLE_text_emb_31_32_First  \n",
       "0                                               0.817674                               \n",
       "1                                               0.712272                               \n",
       "2                                              -0.102214                               \n",
       "3                                               0.032129                               \n",
       "4                                               0.196405                               \n",
       "...                                                  ...                               \n",
       "52170                                           0.572592                               \n",
       "52171                                           0.504752                               \n",
       "52172                                           0.220133                               \n",
       "52173                                           0.263940                               \n",
       "52174                                          -0.027718                               \n",
       "\n",
       "[52175 rows x 33 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(r'../process_m/mb_pag_ref_w2v.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0722c7d-2ff7-41dc-9b6f-5d704bdea016",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
